scenario-notebooks/Tools/PerfTools_Log Analytics_CustomTable_Setup.ipynb (727 lines of code) (raw):
{
"cells": [
{
"cell_type": "markdown",
"source": [
"# Performance Tools - Log Analytics Custom Table Setup\n",
"\n",
"__Notebook Version:__ 1.0<br>\n",
"__Python Version:__ Python 3.8<br>\n",
"__Apache Spark Version:__ 3.1<br>\n",
"__Required Packages:__ azure-monitor-query, azure-mgmt-loganalytics<br>\n",
"__Platforms Supported:__ Azure Synapse Analytics\n",
" \n",
"__Data Source Required:__ No \n",
" \n",
"### Description\n",
"This notebook creates data collection endpoint, custom table, and data collect rules for Azure Log Analytics.<br>\n",
"*** Please run the cells sequentially to avoid errors. Please do not use \"run all cells\". *** <br>\n",
"\n",
"## Table of Contents\n",
"1. Warm-up\n",
"2. Azure Authentication\n",
"3. Create Data Collection Endpoint (DCE)\n",
"4. Create Custom Table\n",
"5. Create Data Collection Rule (DCR)"
],
"metadata": {}
},
{
"cell_type": "markdown",
"source": [
"## 1. Warm-up"
],
"metadata": {}
},
{
"cell_type": "code",
"source": [
"%pip install azure.monitor.query"
],
"outputs": [],
"execution_count": null,
"metadata": {
"jupyter": {
"source_hidden": false,
"outputs_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
}
}
},
{
"cell_type": "code",
"source": [
"from azure.identity import AzureCliCredential, DefaultAzureCredential, ClientSecretCredential\n",
"from azure.core.exceptions import HttpResponseError \n",
"\n",
"from datetime import datetime, timezone, timedelta\n",
"import json\n",
"from IPython.display import display, HTML, Markdown"
],
"outputs": [],
"execution_count": null,
"metadata": {
"gather": {
"logged": 1690302564605
}
}
},
{
"cell_type": "code",
"source": [
"# User Inputs section 1\r\n",
"tenant_id = \"\"\r\n",
"subscription_id = \"\"\r\n",
"workspace_id = \"\"\r\n",
"\r\n",
"# Azure KV for accessing service principal info\r\n",
"akv_name = \"\"\r\n",
"client_id_name = \"\"\r\n",
"client_secret_name = \"\"\r\n",
"akv_link_name = \"\"\r\n",
"\r\n",
"# User Inputs section 2\r\n",
"# Parameters for provisioning resources\r\n",
"resource_group_name = \"\"\r\n",
"location = \"\"\r\n",
"workspace_name = \"\"\r\n",
"workspace_resource_id = \"/subscriptions/{0}/resourceGroups/{1}/providers/Microsoft.OperationalInsights/workspaces/{2}\".format(subscription_id, resource_group_name, workspace_name)\r\n",
"data_collection_endpoint_name = \"\"\r\n",
"data_collection_rule_name = \"\"\r\n",
"custom_table_name = \"\"\r\n",
"custom_table_full_name = \"Custom-\" + custom_table_name"
],
"outputs": [],
"execution_count": null,
"metadata": {
"jupyter": {
"source_hidden": false,
"outputs_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
}
}
},
{
"cell_type": "markdown",
"source": [
"## 2. Azure Authentication"
],
"metadata": {}
},
{
"cell_type": "code",
"source": [
"# You may need to change resource_uri for various cloud environments.\r\n",
"resource_uri = \"https://api.loganalytics.io\"\r\n",
"client_id = mssparkutils.credentials.getSecret(akv_name, client_id_name, akv_link_name)\r\n",
"client_secret = mssparkutils.credentials.getSecret(akv_name, client_secret_name, akv_link_name)\r\n",
"\r\n",
"credential = ClientSecretCredential(\r\n",
" tenant_id=tenant_id, \r\n",
" client_id=client_id, \r\n",
" client_secret=client_secret)\r\n",
"access_token = credential.get_token(resource_uri + \"/.default\")\r\n",
"token = access_token[0]"
],
"outputs": [],
"execution_count": null,
"metadata": {
"jupyter": {
"source_hidden": false,
"outputs_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
}
}
},
{
"cell_type": "markdown",
"source": [
"## 3. Create Data Collection Endpoint (DCE)"
],
"metadata": {
"nteract": {
"transient": {
"deleting": false
}
}
}
},
{
"cell_type": "code",
"source": [
"dce_json_string = \"\"\"\r\n",
"{\r\n",
" \"$schema\": \"https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#\",\r\n",
" \"contentVersion\": \"1.0.0.0\",\r\n",
" \"parameters\": {\r\n",
" \"dataCollectionEndpointName\": {\r\n",
" \"type\": \"string\",\r\n",
" \"metadata\": {\r\n",
" \"description\": \"Specifies the name of the Data Collection Endpoint to create.\"\r\n",
" }\r\n",
" },\r\n",
" \"location\": {\r\n",
" \"type\": \"string\",\r\n",
" \"defaultValue\": \"eastus\",\r\n",
" \"metadata\": {\r\n",
" \"description\": \"Specifies the location for the Data Collection Endpoint.\"\r\n",
" }\r\n",
" }\r\n",
" },\r\n",
" \"resources\": [\r\n",
" {\r\n",
" \"type\": \"Microsoft.Insights/dataCollectionEndpoints\",\r\n",
" \"name\": \"[parameters('dataCollectionEndpointName')]\",\r\n",
" \"location\": \"[parameters('location')]\",\r\n",
" \"apiVersion\": \"2021-04-01\",\r\n",
" \"properties\": {\r\n",
" \"networkAcls\": {\r\n",
" \"publicNetworkAccess\": \"Enabled\"\r\n",
" }\r\n",
" }\r\n",
" }\r\n",
" ],\r\n",
" \"outputs\": {\r\n",
" \"dataCollectionEndpointId\": {\r\n",
" \"type\": \"string\",\r\n",
" \"value\": \"[resourceId('Microsoft.Insights/dataCollectionEndpoints', parameters('dataCollectionEndpointName'))]\"\r\n",
" },\r\n",
" \"endpoint\": {\r\n",
" \"type\": \"object\",\r\n",
" \"value\": \"[reference(resourceId('Microsoft.Insights/dataCollectionEndpoints', parameters('dataCollectionEndpointName'))).logsIngestion]\"\r\n",
" }\r\n",
" }\r\n",
"}\r\n",
"\"\"\""
],
"outputs": [],
"execution_count": null,
"metadata": {
"jupyter": {
"source_hidden": false,
"outputs_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
},
"gather": {
"logged": 1690304357739
}
}
},
{
"cell_type": "code",
"source": [
"from azure.mgmt.resource import ResourceManagementClient\r\n",
"from azure.mgmt.resource.resources.models import DeploymentMode\r\n",
"\r\n",
"resource_client = ResourceManagementClient(credential, subscription_id)\r\n",
"template_body = json.loads(dce_json_string)\r\n",
"\r\n",
"rg_deployment_result = resource_client.deployments.begin_create_or_update(\r\n",
" resource_group_name,\r\n",
" \"exampleDeployment\",\r\n",
" {\r\n",
" \"properties\": {\r\n",
" \"template\": template_body,\r\n",
" \"parameters\": {\r\n",
" \"location\": {\r\n",
" \"value\": location\r\n",
" },\r\n",
" \"dataCollectionEndpointName\": {\r\n",
" \"value\": data_collection_endpoint_name\r\n",
" },\r\n",
" },\r\n",
" \"mode\": DeploymentMode.incremental\r\n",
" }\r\n",
" }\r\n",
")"
],
"outputs": [],
"execution_count": null,
"metadata": {
"jupyter": {
"source_hidden": false,
"outputs_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
},
"gather": {
"logged": 1690304363715
}
}
},
{
"cell_type": "code",
"source": [
"dce_res_id = ''\r\n",
"dce_endpoint = ''\r\n",
"if rg_deployment_result.status() != \"Succeeded\":\r\n",
" print(rg_deployment_result.status())\r\n",
" print('Run the cell until stauts=Succeeded or when you see Failed.')\r\n",
"else:\r\n",
" dce_resource_id = rg_deployment_result.result().properties.outputs[\"dataCollectionEndpointId\"].get(\"value\")\r\n",
" dce_endpoint = rg_deployment_result.result().properties.outputs[\"endpoint\"].get(\"value\")['endpoint']\r\n",
" print('You will need DCE Endpoint for future data ingestion!')\r\n",
" print('DCE Endpoint: ' + dce_endpoint)"
],
"outputs": [],
"execution_count": null,
"metadata": {
"jupyter": {
"source_hidden": false,
"outputs_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
},
"gather": {
"logged": 1690305286390
}
}
},
{
"cell_type": "markdown",
"source": [
"## 4. Create Custom Table"
],
"metadata": {
"nteract": {
"transient": {
"deleting": false
}
}
}
},
{
"cell_type": "code",
"source": [
"# Please replace columns info with your own columns\r\n",
"cus_table_json_string = \"\"\"\r\n",
"{\r\n",
" \"$schema\": \"https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#\",\r\n",
" \"contentVersion\": \"1.0.0.0\",\r\n",
" \"parameters\": {\r\n",
" \"workspaceName\": {\r\n",
" \"type\": \"string\",\r\n",
" \"metadata\": {\r\n",
" \"description\": \"LA workspace name.\"\r\n",
" }\r\n",
" },\r\n",
" \"customTableName\": {\r\n",
" \"type\": \"string\",\r\n",
" \"metadata\": {\r\n",
" \"description\": \"table name.\"\r\n",
" }\r\n",
" }\r\n",
" },\r\n",
" \"resources\": [\r\n",
" {\r\n",
" \"type\": \"Microsoft.OperationalInsights/workspaces/tables\",\r\n",
" \"apiVersion\": \"2021-12-01-preview\",\r\n",
" \"name\": \"[concat(parameters('workspaceName'), '/', parameters('customTableName'))]\",\r\n",
" \"kind\": \"CustomLog\",\r\n",
" \"properties\": {\r\n",
" \"totalRetentionInDays\": 90,\r\n",
" \"plan\": \"Analytics\",\r\n",
" \"schema\": {\r\n",
" \"name\": \"[parameters('customTableName')]\",\r\n",
" \"columns\": [\r\n",
" {\r\n",
" \"name\": \"TimeGenerated\",\r\n",
" \"type\": \"datetime\"\r\n",
" },\r\n",
" {\r\n",
" \"name\": \"TimeInSeconds\",\r\n",
" \"type\": \"real\"\r\n",
" },\r\n",
" {\r\n",
" \"name\": \"QueryBody\",\r\n",
" \"type\": \"string\"\r\n",
" }\r\n",
" ]\r\n",
" },\r\n",
" \"retentionInDays\": 90\r\n",
" }\r\n",
" }\r\n",
" ],\r\n",
" \"outputs\": {\r\n",
" \"streamName\": {\r\n",
" \"type\": \"string\",\r\n",
" \"value\": \"[concat('Custom-', parameters('customTableName'))]\"\r\n",
" }\r\n",
" }\r\n",
"}\r\n",
"\"\"\""
],
"outputs": [],
"execution_count": null,
"metadata": {
"jupyter": {
"source_hidden": false,
"outputs_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
},
"gather": {
"logged": 1690306156609
}
}
},
{
"cell_type": "code",
"source": [
"from azure.mgmt.resource import ResourceManagementClient\r\n",
"from azure.mgmt.resource.resources.models import DeploymentMode\r\n",
"\r\n",
"resource_client = ResourceManagementClient(credential, subscription_id)\r\n",
"template_body = json.loads(cus_table_json_string)\r\n",
"table_tag = \"defaultct\"\r\n",
"\r\n",
"rg_deployment_result = resource_client.deployments.begin_create_or_update(\r\n",
" resource_group_name,\r\n",
" table_tag,\r\n",
" {\r\n",
" \"properties\": {\r\n",
" \"template\": template_body,\r\n",
" \"parameters\": {\r\n",
" \"workspaceName\": {\r\n",
" \"value\": workspace_name\r\n",
" },\r\n",
" \"customTableName\": {\r\n",
" \"value\": custom_table_name\r\n",
" }\r\n",
" },\r\n",
" \"mode\": DeploymentMode.incremental\r\n",
" }\r\n",
" }\r\n",
")"
],
"outputs": [],
"execution_count": null,
"metadata": {
"jupyter": {
"source_hidden": false,
"outputs_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
},
"gather": {
"logged": 1690306163834
}
}
},
{
"cell_type": "code",
"source": [
"stream_name = ''\r\n",
"if rg_deployment_result.status() != \"Succeeded\":\r\n",
" print(rg_deployment_result.status())\r\n",
" print('Run the cell until stauts=Succeeded or when you see Failed.')\r\n",
"else:\r\n",
" stream_name = rg_deployment_result.result().properties.outputs[\"streamName\"].get(\"value\")\r\n",
" print('You will need full stream name for future data ingestion!')\r\n",
" print('Stream Name: ' + stream_name)"
],
"outputs": [],
"execution_count": null,
"metadata": {
"jupyter": {
"source_hidden": false,
"outputs_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
},
"gather": {
"logged": 1690306331174
}
}
},
{
"cell_type": "markdown",
"source": [
"## 5. Create Data Collection Rule (DCR)"
],
"metadata": {
"nteract": {
"transient": {
"deleting": false
}
}
}
},
{
"cell_type": "code",
"source": [
"# Please replace columns info with your own columns\r\n",
"dcr_json_string = \"\"\"\r\n",
"{\r\n",
" \"$schema\": \"https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#\",\r\n",
" \"contentVersion\": \"1.0.0.0\",\r\n",
" \"parameters\": {\r\n",
" \"dataCollectionRuleName\": {\r\n",
" \"type\": \"string\",\r\n",
" \"metadata\": {\r\n",
" \"description\": \"Specifies the name of the Data Collection Rule to create.\"\r\n",
" }\r\n",
" },\r\n",
" \"location\": {\r\n",
" \"type\": \"string\",\r\n",
" \"metadata\": {\r\n",
" \"description\": \"Specifies the location in which to create the Data Collection Rule.\"\r\n",
" }\r\n",
" },\r\n",
" \"workspaceResourceId\": {\r\n",
" \"type\": \"string\",\r\n",
" \"metadata\": {\r\n",
" \"description\": \"Specifies the Azure resource ID of the Log Analytics workspace to use.\"\r\n",
" }\r\n",
" },\r\n",
" \"workspaceName\": {\r\n",
" \"type\": \"string\",\r\n",
" \"metadata\": {\r\n",
" \"description\": \"LA workspace name.\"\r\n",
" }\r\n",
" },\r\n",
" \"endpointResourceId\": {\r\n",
" \"type\": \"string\",\r\n",
" \"metadata\": {\r\n",
" \"description\": \"Specifies the Azure resource ID of the Data Collection Endpoint to use.\"\r\n",
" }\r\n",
" },\r\n",
" \"customTableFullName\": {\r\n",
" \"type\": \"string\",\r\n",
" \"metadata\": {\r\n",
" \"description\": \"table name.\"\r\n",
" }\r\n",
" }\r\n",
" },\r\n",
" \"resources\": [\r\n",
" {\r\n",
" \"type\": \"Microsoft.Insights/dataCollectionRules\",\r\n",
" \"name\": \"[parameters('dataCollectionRuleName')]\",\r\n",
" \"location\": \"[parameters('location')]\",\r\n",
" \"apiVersion\": \"2021-09-01-preview\",\r\n",
" \"properties\": {\r\n",
" \"dataCollectionEndpointId\": \"[parameters('endpointResourceId')]\",\r\n",
" \"streamDeclarations\": {\r\n",
" \"[parameters('customTableFullName')]\": {\r\n",
" \"columns\": [\r\n",
" {\r\n",
" \"name\": \"TimeGenerated\",\r\n",
" \"type\": \"datetime\"\r\n",
" },\r\n",
" {\r\n",
" \"name\": \"TimeInSeconds\",\r\n",
" \"type\": \"real\"\r\n",
" },\r\n",
" {\r\n",
" \"name\": \"QueryBody\",\r\n",
" \"type\": \"string\"\r\n",
" }\r\n",
" ]\r\n",
" }\r\n",
" },\r\n",
" \"destinations\": {\r\n",
" \"logAnalytics\": [\r\n",
" {\r\n",
" \"workspaceResourceId\": \"[parameters('workspaceResourceId')]\",\r\n",
" \"name\": \"[parameters('workspaceName')]\"\r\n",
" }\r\n",
" ]\r\n",
" },\r\n",
" \"dataFlows\": [\r\n",
" {\r\n",
" \"streams\": [\r\n",
" \"[parameters('customTableFullName')]\"\r\n",
" ],\r\n",
" \"destinations\": [\r\n",
" \"[parameters('workspaceName')]\"\r\n",
" ],\r\n",
" \"transformKql\": \"source\",\r\n",
" \"outputStream\": \"[parameters('customTableFullName')]\"\r\n",
" }\r\n",
" ]\r\n",
" }\r\n",
" }\r\n",
" ],\r\n",
" \"outputs\": {\r\n",
" \"dataCollectionRuleId\": {\r\n",
" \"type\": \"string\",\r\n",
" \"value\": \"[resourceId('Microsoft.Insights/dataCollectionRules', parameters('dataCollectionRuleName'))]\"\r\n",
" },\r\n",
" \"immutableId\": {\r\n",
" \"type\": \"string\",\r\n",
" \"value\": \"[reference(resourceId('Microsoft.Insights/dataCollectionRules', parameters('dataCollectionRuleName'))).immutableId]\"\r\n",
" }\r\n",
" }\r\n",
"}\r\n",
"\"\"\""
],
"outputs": [],
"execution_count": null,
"metadata": {
"jupyter": {
"source_hidden": false,
"outputs_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
},
"gather": {
"logged": 1690307597100
}
}
},
{
"cell_type": "code",
"source": [
"from azure.mgmt.resource import ResourceManagementClient\r\n",
"from azure.mgmt.resource.resources.models import DeploymentMode\r\n",
"\r\n",
"resource_client = ResourceManagementClient(credential, subscription_id)\r\n",
"\r\n",
"template_body = json.loads(dcr_json_string)\r\n",
"dce_resource_id = '/subscriptions/{0}/resourceGroups/{1}/providers/Microsoft.Insights/dataCollectionEndpoints/{2}'.format(subscription_id, resource_group_name, data_collection_endpoint_name)\r\n",
"endpoint_resource_id = dce_resource_id\r\n",
"tag_name = \"defaultdcr\"\r\n",
"\r\n",
"rg_deployment_result = resource_client.deployments.begin_create_or_update(\r\n",
" resource_group_name,\r\n",
" tag_name,\r\n",
" {\r\n",
" \"properties\": {\r\n",
" \"template\": template_body,\r\n",
" \"parameters\": {\r\n",
" \"location\": {\r\n",
" \"value\": location\r\n",
" },\r\n",
" \"dataCollectionRuleName\": {\r\n",
" \"value\": data_collection_rule_name\r\n",
" },\r\n",
" \"workspaceResourceId\": {\r\n",
" \"value\": workspace_resource_id\r\n",
" },\r\n",
" \"workspaceName\": {\r\n",
" \"value\": workspace_name\r\n",
" },\r\n",
" \"endpointResourceId\": {\r\n",
" \"value\": endpoint_resource_id\r\n",
" },\r\n",
" \"customTableFullName\": {\r\n",
" \"value\": custom_table_full_name\r\n",
" }\r\n",
" },\r\n",
" \"mode\": DeploymentMode.incremental\r\n",
" }\r\n",
" }\r\n",
")"
],
"outputs": [],
"execution_count": null,
"metadata": {
"jupyter": {
"source_hidden": false,
"outputs_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
},
"gather": {
"logged": 1690307603837
}
}
},
{
"cell_type": "code",
"source": [
"immutable_id = ''\r\n",
"if rg_deployment_result.status() != \"Succeeded\":\r\n",
" print(rg_deployment_result.status())\r\n",
" print('Run the cell until stauts=Succeeded or when you see Failed.')\r\n",
"else:\r\n",
" immutable_id = rg_deployment_result.result().properties.outputs[\"immutableId\"].get(\"value\")\r\n",
" print('You will need DCR Immutable Id for future data ingestion!')\r\n",
" print('DCR Immutable Id: ' + immutable_id)\r\n"
],
"outputs": [],
"execution_count": null,
"metadata": {
"jupyter": {
"source_hidden": false,
"outputs_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
},
"gather": {
"logged": 1690307790923
}
}
},
{
"cell_type": "markdown",
"source": [
"* Once finished everything, make sure adding the Entra ID to the new DCR as a Monitoring Metrics Publisher"
],
"metadata": {
"nteract": {
"transient": {
"deleting": false
}
}
}
}
],
"metadata": {
"kernelspec": {
"name": "synapse_pyspark",
"display_name": "Synapse PySpark"
},
"language_info": {
"name": "python"
},
"description": null,
"save_output": true,
"synapse_widget": {
"version": "0.1",
"state": {}
}
},
"nbformat": 4,
"nbformat_minor": 2
}